/*
 * Copyright 2015-2016 Red Hat, Inc. and/or its affiliates and other contributors as indicated by the @author tags.
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
 * applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */

package org.hawkular.datamining.forecast.stats;

import java.util.Arrays;
import java.util.EnumMap;
import java.util.Map;

import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression;
import org.hawkular.datamining.forecast.utils.TimeSeriesDifferencing;
import org.hawkular.datamining.forecast.utils.TimeSeriesLag;

/** Augmented Dickey-Fuller test
 * <p>
 * Null hypothesis H0: Time series contains unit root (is non stationary)
 * <p>
 * Implemented variants:
 * <ul>
 * <li>c - with constant (should be used for testing trend stationary)</li>
 * <li>ct - with constant ant time trend</li>
 * <li>nc - no constant</li>
 * </ul>
 * <p>
 * Equations for ADF test
 * <ul>
 * <li>yDiff = alpha + beta*t + gamma*yLag + sum(deltaLagT*yLagDiff)</li>
 * <li>dfStatistics = gammaHat/standardError(gammaHat)</li>
 * </ul>
 * <p>
 * Statistical tables adapted from:
 * <a href="https://github.com/statsmodels/statsmodels/blob/master/statsmodels/tsa/stattools.py">Python statsmodels</a>
 *
 * @author Pavol Loffay */
public class AugmentedDickeyFullerTest {

	/** ADF test variants */
	public enum Type {
		/** nc (no constant) */
		NoInterceptNoTimeTrend,
		/** c (constant) */
		InterceptNoTimeTrend,
		/** ct (constant trend) */
		InterceptTimeTrend
	}

	private final int maxLag;
	private final Type type;

	private Double adfStat;
	private Double pValue;

	/** Default constructor for testing trend stationary time series. Type is set to InterceptNoTimeTrend (in R 'c' -
	 * constant)
	 *
	 * @param x
	 * @param maxLag */
	public AugmentedDickeyFullerTest(double[] x, int maxLag) {
		this(x, maxLag, Type.InterceptNoTimeTrend);
	}

	public AugmentedDickeyFullerTest(double[] x, int maxLag, Type type) {
		this.maxLag = maxLag;
		this.type = type;

		calculateStatistics(x);
	}

	public Type getType() {
		return type;
	}

	public double pValue() {
		if (pValue == null) {
			pValue = macKinnonPValue(adfStat, 1);
		}

		return pValue;
	}

	public double statistics() {
		return adfStat;
	}

	private void calculateStatistics(double[] x) {
		double[] differences = TimeSeriesDifferencing.differencesAtLag(x, 1);

		double[] dependedVariable = Arrays.copyOfRange(differences, maxLag, differences.length);
		double[][] explanatoryVariables = explanatoryVariables(x, differences);

		// OLS model
		OLSMultipleLinearRegression olsMultipleLinearRegression = new OLSMultipleLinearRegression();
		olsMultipleLinearRegression.setNoIntercept(type == Type.NoInterceptNoTimeTrend);
		olsMultipleLinearRegression.newSampleData(dependedVariable, explanatoryVariables);

		double[] parameters = olsMultipleLinearRegression.estimateRegressionParameters();
		double[] standardErrors = olsMultipleLinearRegression.estimateRegressionParametersStandardErrors();

		// first parameter is intercept, second gamma*(lagged xt)
		int gammaIndex = (type == Type.NoInterceptNoTimeTrend) ? 0 : 1;
		adfStat = parameters[gammaIndex] / standardErrors[gammaIndex];
	}

	private double[][] explanatoryVariables(double[] x, double[] differences) {

		double[] xt = Arrays.copyOfRange(x, maxLag, differences.length);

		// xt + timeTrend + laggedDifferences
		int variablesSize = (type == Type.InterceptTimeTrend ? 2 : 1) + maxLag;
		double[][] explanatory = new double[variablesSize][];

		explanatory[0] = xt;
		if (type == Type.InterceptTimeTrend) {
			explanatory[1] = timeTrend(maxLag + 1, differences.length);
		}

		double[][] laggedDifferences = TimeSeriesLag.lag(differences, maxLag + 1);
		for (int i = 1; i < laggedDifferences.length; i++) {
			explanatory[variablesSize - maxLag + (i - 1)] = Arrays.copyOf(laggedDifferences[i],
					laggedDifferences[i].length);
		}

		return transpose(explanatory);
	}

	private double[] timeTrend(int start, int length) {
		double[] result = new double[length - start + 1];

		for (int i = 0; i < result.length; i++) {
			result[i] = i + start;
		}

		return result;
	}

	private double[][] transpose(double[][] arr) {
		double[][] result = new double[arr[0].length][arr.length];

		for (int row = 0; row < arr.length; row++) {
			for (int column = 0; column < arr[0].length; column++) {
				result[column][row] = arr[row][column];
			}
		}

		return result;
	}

	/** Returns MacKinnon's approximate p-value for the given test statistic.
	 * <p>
	 * MacKinnon, J.G. 1994 "Approximate Asymptotic Distribution Functions for Unit-Root and Cointegration Tests."
	 * Journal of Business & Economics Statistics, 12.2, 167-76.
	 *
	 * @param testStat "T-value" from an Augmented Dickey-Fuller regression.
	 * @param maxLag The number of series believed to be I(1). For (Augmented) Dickey-Fuller n = 1.
	 * @return The p-value for the ADF statistic using MacKinnon 1994. */
	private double macKinnonPValue(double testStat, int maxLag) {
		double[] maxStat = ADF_TAU_MAX.get(type);
		if (testStat > maxStat[maxLag - 1]) {
			return 1.0;
		}
		double[] minStat = ADF_TAU_MIN.get(type);
		if (testStat < minStat[maxLag - 1]) {
			return 0.0;
		}

		double[] starStat = ADF_TAU_STAR.get(type);
		double[] tauCoef = testStat <= starStat[maxLag - 1] ? ADF_TAU_SMALLP.get(type)[maxLag - 1]
				: ADF_TAU_LARGEP.get(type)[maxLag - 1];

		return new NormalDistribution().cumulativeProbability(polyval(tauCoef, testStat));
	}

	private double polyval(double[] coefs, double x) {
		double result = 0;

		for (int i = coefs.length - 1; i >= 0; i--) {
			result = result * x + coefs[i];
		}

		return result;
	}

	private static final Map<Type, double[]> ADF_TAU_MAX = new EnumMap<>(Type.class);
	private static final Map<Type, double[]> ADF_TAU_MIN = new EnumMap<>(Type.class);
	private static final Map<Type, double[]> ADF_TAU_STAR = new EnumMap<>(Type.class);
	private static final Map<Type, double[][]> ADF_TAU_SMALLP = new EnumMap<>(Type.class);
	private static final Map<Type, double[][]> ADF_TAU_LARGEP = new EnumMap<>(Type.class);
	private static final double[] ADF_LARGE_SCALING = { 1.0, 1e-1, 1e-1, 1e-2 };

	static {
		ADF_TAU_MAX.put(Type.NoInterceptNoTimeTrend, new double[] { 1.51, 0.86, 0.88, 1.05, 1.24 });
		ADF_TAU_MAX.put(Type.InterceptNoTimeTrend, new double[] { 2.74, 0.92, 0.55, 0.61, 0.79, 1 });
		ADF_TAU_MAX.put(Type.InterceptTimeTrend, new double[] { 0.7, 0.63, 0.71, 0.93, 1.19, 1.42 });

		ADF_TAU_MIN.put(Type.NoInterceptNoTimeTrend, new double[] { -19.04, -19.62, -21.21, -23.25, -21.63, -25.74 });
		ADF_TAU_MIN.put(Type.InterceptNoTimeTrend, new double[] { -18.83, -18.86, -23.48, -28.07, -25.96, -23.27 });
		ADF_TAU_MIN.put(Type.InterceptTimeTrend, new double[] { -16.18, -21.15, -25.37, -26.63, -26.53, -26.18 });

		ADF_TAU_STAR.put(Type.NoInterceptNoTimeTrend, new double[] { -1.04, -1.53, -2.68, -3.09, -3.07, -3.77 });
		ADF_TAU_STAR.put(Type.InterceptNoTimeTrend, new double[] { -1.61, -2.62, -3.13, -3.47, -3.78, -3.93 });
		ADF_TAU_STAR.put(Type.InterceptTimeTrend, new double[] { -2.89, -3.19, -3.50, -3.65, -3.80, -4.36 });

		ADF_TAU_SMALLP.put(Type.NoInterceptNoTimeTrend,
				new double[][] { new double[] { 0.6344, 1.2378, 3.2496 * 1e-2 },
						new double[] { 1.9129, 1.3857, 3.5322 * 1e-2 }, new double[] { 2.7648, 1.4502, 3.4186 * 1e-2 },
						new double[] { 3.4336, 1.4835, 3.19 * 1e-2 }, new double[] { 4.0999, 1.5533, 3.59 * 1e-2 },
						new double[] { 4.5388, 1.5344, 2.9807 * 1e-2 } });
		ADF_TAU_SMALLP.put(Type.InterceptNoTimeTrend,
				new double[][] { new double[] { 2.1659, 1.4412, 3.8269 * 1e-2 },
						new double[] { 2.92, 1.5012, 3.9796 * 1e-2 }, new double[] { 3.4699, 1.4856, 3.164 * 1e-2 },
						new double[] { 3.9673, 1.4777, 2.6315 * 1e-2 }, new double[] { 4.5509, 1.5338, 2.9545 * 1e-2 },
						new double[] { 5.1399, 1.6036, 3.4445 * 1e-2 } });
		ADF_TAU_SMALLP.put(Type.InterceptTimeTrend,
				new double[][] { new double[] { 3.2512, 1.6047, 4.9588 * 1e-2 },
						new double[] { 3.6646, 1.5419, 3.6448 * 1e-2 }, new double[] { 4.0983, 1.5173, 2.9898 * 1e-2 },
						new double[] { 4.5844, 1.5338, 2.8796 * 1e-2 }, new double[] { 5.0722, 1.5634, 2.9472 * 1e-2 },
						new double[] { 5.53, 1.5914, 3.0392 * 1e-2 } });

		ADF_TAU_LARGEP.put(Type.NoInterceptNoTimeTrend, new double[][] {
				new double[] { 0.4797, 9.3557, -0.6999, 3.3066 }, new double[] { 1.5578, 8.558, -2.083, -3.3549 },
				new double[] { 2.2268, 6.8093, -3.2362, -5.4448 }, new double[] { 2.7654, 6.4502, -3.0811, -4.4946 },
				new double[] { 3.2684, 6.8051, -2.6778, -3.4972 }, new double[] { 3.7268, 7.167, -2.3648, -2.8288 }, });
		ADF_TAU_LARGEP.put(Type.InterceptNoTimeTrend, new double[][] {
				new double[] { 1.7339, 9.3202, -1.2745, -1.0368 }, new double[] { 2.1945, 6.4695, -2.9198, -4.2377 },
				new double[] { 2.5893, 4.5168, -3.6529, -5.0074 }, new double[] { 3.0387, 4.5452, -3.3666, -4.1921 },
				new double[] { 3.5049, 5.2098, -2.9158, -3.3468 }, new double[] { 3.9489, 5.8933, -2.5359, -2.721 }, });
		ADF_TAU_LARGEP.put(Type.InterceptTimeTrend, new double[][] { new double[] { 2.5261, 6.1654, -3.7956, -6.0285 },
				new double[] { 2.85, 5.272, -3.6622, -5.1695 }, new double[] { 3.221, 5.255, -3.2685, -4.1501 },
				new double[] { 3.652, 5.9758, -2.7483, -3.2081 }, new double[] { 4.0712, 6.6428, -2.3464, -2.546 },
				new double[] { 4.4735, 7.1757, -2.0681, -2.1196 }, });

		ADF_TAU_LARGEP.entrySet()
			.stream()
			.forEach(typeEntry -> {
				double[][] values = typeEntry.getValue();
				for (int i = 0; i < values.length; i++) {
					for (int j = 0; j < values[i].length; j++) {
						values[i][j] = values[i][j] * ADF_LARGE_SCALING[j];
					}
				}
			});
	}
}
